clear all
set more off
set mem 10000000
set matsize 10000
*version 13
version 12

************************************************************** 
*** Build file for SECC Economic Census Data *****************
************************************************************** 

** Set file paths
do "$path_code/paths.do"

**************************************************************
**************************************************************

** NOTE: These files are huge, there's an obseravation for every person in rural India!
** This script grabs only the most essential variables for RGGVY purposes.
** It's written to grab all folders in the master SECC_RAW data folder, so we can 
** iterate it several times and not have to load the entire SECC_RAW folder at once.

**************************************************************
**************************************************************

** TO START, download and unzip the 6 secc_raw_X.zip files (Zenodo links in README)
** Then, place the two unzipped folders (full of a combined 621 zipped folders) into "data/secc data/secc_raw"


** loop over these two mega folders
foreach f1 in secc_raw_1 secc_raw_2 secc_raw_3 secc_raw_4 secc_raw_5 secc_raw_6 {
  
	** within each mega folder, identify district-specific zip folders
	cd "$secc_raw/`f1'"
	local zips : dir . files "*.zip"
	di `zips'

	** loop over folders to unzip
	foreach zipfile of local zips {
		
		di "`zipfile'"
		cap unzipfile `zipfile'

		** make a local of the unzipped folder name (should only be 1)
		local unzipped : dir "." dirs "*"
		
		** go into that folder. this "loop" is necessary to get the foldering to work
		foreach f2 in `unzipped' {
			
			** now we're inside the unzipped folder
			cd "$secc_raw/`f1'/`f2'"
			 
			** make a local of subfolders
			local unzipped : dir "." dirs "*"
	 
			local counter = 0
			
			** loop over subfolders
			foreach f3 in `r(folders)' {
			   
				di "`f3'"
				cd "$secc_raw/`f1'/`f2'/`f3'"
			   
				** now loop over excelfiles
				local allfiles : dir . files "*.xlsx"
				di `allfiles'
				foreach f4 of local allfiles {
					
					di "`f4'"
					
					** import excel file
					import excel using `f4', case(lower) clear firstrow 
				 
					replace slnomember = "001" if ahl_tin == "15031500400470000015400023001"
					replace slnomember = "010" if ahl_tin == "37096000303700000080100009014"
					replace slnomember = "003" if ahl_tin == "43232000300960000016600013003"
					
					** discard unnecessary variables
					foreach var in list ahl_tin npr_tin grampanchayatname name fathername pay_income_tax mothername dob religion ///
						education_other primitive_tribal_group rural_urban addressline* disabilitycode {
						cap drop `var'
					}
					
					** append all excel files
					cap append using "$secc_raw/`f1'/`f2'/`f3'/temp.dta", force
					save "$secc_raw/`f1'/`f2'/`f3'/temp.dta", replace
				}
				
				** load appended excel files (now in dta form)
				use "$secc_raw/`f1'/`f2'/`f3'/temp.dta", clear
				
				** execute cleaning code
				qui do "$path/code/build/build_secc_clean.do"  
				local st = st_code[1]
				local dt = dt_code[1]
				
				** save cleaned dta
				save "$secc/secc_dtas/st`st'/secc_indiv_rural_st`st'_dt`dt'_`counter'.dta", replace
				local counter = `counter' + 1
			}
		}
		
		* come back out
		cd "$secc_raw/`f1'"
		
		* use the shell cmd to delete dir without it being empty
		* /s /q force, and prevent from asking for confirmation
		!rmdir `unzipped' /s /q
	}
}
  
**************************************************************
**************************************************************
 
